{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting gym\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/b3/99/7cc3e510678119cdac91f33fb9235b98448f09a6bdf0cafea2b108d9ce51/gym-0.17.2.tar.gz (1.6MB)\n",
      "\u001b[K     |████████████████████████████████| 1.6MB 914kB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: scipy in ./anaconda3/lib/python3.7/site-packages (from gym) (1.4.1)\n",
      "Requirement already satisfied: numpy>=1.10.4 in ./anaconda3/lib/python3.7/site-packages (from gym) (1.17.2)\n",
      "Collecting pyglet<=1.5.0,>=1.4.0 (from gym)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/70/ca/20aee170afe6011e295e34b27ad7d7ccd795faba581dd3c6f7cec237f561/pyglet-1.5.0-py2.py3-none-any.whl (1.0MB)\n",
      "\u001b[K     |████████████████████████████████| 1.0MB 1.5MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: cloudpickle<1.4.0,>=1.2.0 in ./anaconda3/lib/python3.7/site-packages (from gym) (1.2.2)\n",
      "Requirement already satisfied: future in ./anaconda3/lib/python3.7/site-packages (from pyglet<=1.5.0,>=1.4.0->gym) (0.17.1)\n",
      "Building wheels for collected packages: gym\n",
      "  Building wheel for gym (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for gym: filename=gym-0.17.2-cp37-none-any.whl size=1650893 sha256=e62a4e6df1e57af2cafb4bfb6ce013f996e0c1e301479aa67d7e8183b8938770\n",
      "  Stored in directory: /home/ben/.cache/pip/wheels/87/e0/91/f56e44e8062f8cd549673da49f59e1d4fe8b17398119b1d221\n",
      "Successfully built gym\n",
      "Installing collected packages: pyglet, gym\n",
      "Successfully installed gym-0.17.2 pyglet-1.5.0\n"
     ]
    }
   ],
   "source": [
    "!pip install gym"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch as T\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This is partly based on the (more sophisticated) implementation is in the pytorch repository:\n",
    "# https://github.com/pytorch/examples/blob/master/reinforcement_learning/reinforce.py\n",
    "class PolicyNetwork(nn.Module):\n",
    "    def __init__(self, lr, n_inputs, n_hidden, n_actions):\n",
    "        \"\"\"\n",
    "        A network that learns a policy, i.e. a mapping\n",
    "        from observations to actions.\n",
    "        \n",
    "        This sets up a two layer neural network,\n",
    "        one hidden, one output, where each neuron\n",
    "        in the output layer corresponds to one possible\n",
    "        action.\n",
    "        \n",
    "        Parameters:\n",
    "        -----------\n",
    "        lr - learning rate\n",
    "        input_dims - input dimensionality\n",
    "        \n",
    "        \"\"\"\n",
    "        super(PolicyNetwork, self).__init__()\n",
    "        self.lr = lr\n",
    "        self.fc1 = nn.Linear(n_inputs, n_hidden)\n",
    "        self.fc2 = nn.Linear(n_hidden, n_actions)\n",
    "        self.optimizer = T.optim.Adam(self.parameters(), lr=self.lr)\n",
    "    \n",
    "        self.device = T.device(\n",
    "            'cuda:0'\n",
    "            if T.cuda.is_available()\n",
    "            else 'cpu:0'\n",
    "        )\n",
    "        self.to(self.device)\n",
    "        \n",
    "    def forward(self, observation):\n",
    "        x = T.Tensor(\n",
    "            observation.reshape(-1).astype('float32'),\n",
    "        ).to(self.device)\n",
    "        x = T.Tensor(observation).to(self.device)\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.softmax(self.fc2(x), dim=0)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Agent:\n",
    "    eps = np.finfo(np.float32).eps.item()\n",
    "\n",
    "    def __init__(self, env, lr, params, gamma=0.99, epsilon=0.1):\n",
    "        \"\"\"\n",
    "        Agents evaluate policies to take actions,\n",
    "        and get rewards.\n",
    "        \n",
    "        gamma is the discount factor\n",
    "        \n",
    "        \"\"\"\n",
    "        self.env = env\n",
    "        self.gamma = gamma\n",
    "        self.actions = []\n",
    "        self.rewards = []\n",
    "        self.policy = PolicyNetwork(\n",
    "            lr=lr,\n",
    "            **params\n",
    "        )\n",
    "    \n",
    "    def run(self):\n",
    "        state = self.env.reset()\n",
    "        probs = []\n",
    "        rewards = []\n",
    "        done = False\n",
    "        observation = self.env.reset()\n",
    "        t = 0\n",
    "        while not done:\n",
    "            action, prob = self.choose_action(observation)\n",
    "            probs.append(prob)\n",
    "            observation, reward, done, _ = self.env.step(action)\n",
    "            rewards.append(reward)\n",
    "            t += 1\n",
    "\n",
    "        policy_loss = []\n",
    "        returns = []\n",
    "        R = 0\n",
    "        for r in rewards[::-1]:\n",
    "            R = r + self.gamma * R\n",
    "            returns.insert(0, R)\n",
    "        returns = T.tensor(returns)\n",
    "        returns = (returns - returns.mean()) / (returns.std() + self.eps)\n",
    "\n",
    "        for log_prob, R in zip(probs, returns):\n",
    "            policy_loss.append(-log_prob * R)\n",
    "\n",
    "        if(len(policy_loss)) > 0:\n",
    "            self.policy.optimizer.zero_grad()\n",
    "            policy_loss = T.stack(policy_loss, 0).sum()\n",
    "            policy_loss.backward()\n",
    "            self.policy.optimizer.step()\n",
    "            \n",
    "        # for debugging:\n",
    "        #print(policy_loss)\n",
    "        return t\n",
    "    \n",
    "    def choose_action(self, observation):\n",
    "        \"\"\"choose an action given an observation\n",
    "\n",
    "        The action is sampled according to the categorical\n",
    "        distribution from our network.\n",
    "        \"\"\"        \n",
    "        output = self.policy.forward(\n",
    "            observation\n",
    "        )\n",
    "        action_probs = T.distributions.Categorical(output)\n",
    "        action = action_probs.sample()\n",
    "        log_probs = action_probs.log_prob(action)\n",
    "        action = action.item()\n",
    "        self.actions.append(log_probs)\n",
    "        # epsilon-greedy choice:\n",
    "        #rand = np.random.random()\n",
    "        #if rand <= self.epsilon:\n",
    "        #    action = np.random.choice(self.action_space)\n",
    "        return action, log_probs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "\n",
    "env = gym.make('CartPole-v1')\n",
    "env._max_episode_steps = 10000\n",
    "input_dims = env.observation_space.low.reshape(-1).shape[0]\n",
    "n_actions = env.action_space.n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "\n",
    "env = gym.make('CartPole-v1')  # gym_tetris.tetris_env.TetrisEnv()\n",
    "env._max_episode_steps = 10000\n",
    "input_dims = env.observation_space.low.reshape(-1).shape[0]\n",
    "n_actions = env.action_space.n\n",
    "\n",
    "agent = Agent(\n",
    "    env=env,\n",
    "    lr=0.01,\n",
    "    params=dict(n_inputs=input_dims, n_hidden=10, n_actions=n_actions),\n",
    "    gamma=0.99,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Stopping. Iteration 76, average score: 145.316\n"
     ]
    }
   ],
   "source": [
    "update_interval = 100\n",
    "\n",
    "scores = []\n",
    "score = 0\n",
    "n_episodes = 25000\n",
    "stop_criterion = 1000\n",
    "for i in range(n_episodes):\n",
    "    mean_score = np.mean(scores[-update_interval:])\n",
    "    if (i>0) and (i % update_interval) == 0:\n",
    "        print('Iteration {}, average score: {:.3f}'.format(\n",
    "            i, mean_score\n",
    "        ))\n",
    "\n",
    "    score = agent.run()\n",
    "    scores.append(score)\n",
    "    if score >= stop_criterion:\n",
    "        print('Stopping. Iteration {}, average score: {:.3f}'.format(\n",
    "            i, mean_score\n",
    "        ))\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0, 0.5, 'scores')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEQCAYAAACJLbLdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VOXZ//HPlZ2dsO9EREBAQVlU3LdatVqtdasb1q120z62trb9Vfs83axP3R9XarXu1H2tRapSQIGAoCDIHghbyELIOpOZuX9/nDNxskECmcwkfN+v17xO5txn5lwTQq7cuznnEBERiZeURAcgIiIdmxKNiIjElRKNiIjElRKNiIjElRKNiIjElRKNiIjElRKNiIjElRKNiIjElRKNiIjEVVqiA0gGffr0cTk5OYkOQ0SkXVm8eHGhc67v3q5TogFycnLIzc1NdBgiIu2KmeU15zo1nYmISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiIHIAWbijm7lmrCYYicb+XEo2IyAFo0cZi7p+9pk3upUQjInIACtSEAUhPtbjfS4lGROQAFAhHyExLwUyJRkRE4iAYipCR1jYpQIlGROQAFAx5NZq2oEQjInIACoYiZKQq0YiISJwEQhEy01Pb5F5KNCIiByDVaEREJK6CYQ0GEBGRONKoMxERiatAKKymMxERiZ9gKEJmuhKNiIjESUCDAUREJJ40GEBEROIqUKNEIyIicRQMR8hM04RNERGJE611JiIicaV5NCIiElfBsEadiYhInITCEcIRpxqNiIjERzAcAVAfjYiIxEcw5CUa1WhERCQulGhERCSuAtFEo8EAIiISD4GOWKMxsxlmttLMys2syMzeMbPx9a650MxWmFnAzDaa2a31ynPM7HX/PUrNbKaZDYgpTzGzO8ws33+PpWZ2Vlt8PhGR9iTadNbRVga4BtgFPA/sBs4E/mlmWQBmdgzwIjAMeAFIA+40sxv88hTgbeBcYB7wKXAh8GrMPW4Fbgdq/PcYA7xhZuPi/eFERNqTjjrqbLJz7hjn3HXAyf65wcBY/+ufAwbc4Zy7CrjKP3+bfzzXv/Zz59wZwKlAHnC0mZ1kZmnAT/1rv+2/x11AKvCzOH4uEZF2p0MOBnDOLY55muEfw8A2/+sj/GNuveNwM+sZU77Yf78wXq0GYCIwFOgNRIAl9d5jYmMxmdn1ZpZrZrk7d+5s8WcSEWmvAqEw0MESTZSZdQX+5j+92zkXTTT9/WO5f6yIedmARspjr4ktr3TOuUbKG3DOPeacm+ycm9y3b9+WfRARkXbsqz6aDpZozKwv8AEwDXgcr7ksaod/7FrvCLC9kfLYr2PLO/v9OfXLRUTE1yGbzsxsODAXmAz80Tl3fUzNA2Cpf5zqH6f4x03OuV0x5VPMkwoc6Z9bBmwGivE+z6R677GsVT+MiEg7Fx0M0FbzaNLa5C4wHxgEbMKrddzrn3/OObcQ+DNwDnC7P+z5NL/8T/7xdWAVMA54D8jE65dZ6Jz7AMDM/gL8HviHmc0BLsLrB7orzp9NRKRdCdR0wBoNXpIBb/jyTTGPsQDOuXnApXiJ6FK8BHEb8IhfHgHOAt7Ca3o7EngZOD/mHncCvwPSgUuAL4HznHPL4/i5RETanUC4bRNNm9RonHPWjGtexJtL01T5BrxaT1PlYeD/+Q8REWlCR52wKSIiSaLDjjoTEZHkUDuPRotqiohIPARDEdJSjJSUvfZqtAolGhGRA0wwFGmzZjNQohEROeAEw5E2G3EGSjQiIgecYEiJRkRE4iigRCMiIvEUDEXabMQZKNGIiBxwAqFIm03WBCUaEZEDjgYDiIhIXAVqwko0IiISP8Gw5tGIiEgcacKmiIjElebRiIhIXAU0vFlEROJJNRoREYkrDW8WEZG4CmrCpoiIxJOazkREJG4iEec1nWkwgIiIxEMwHAFQjUZEROIjmmg0YVNEROIiGFKiERGROAqE1HQmIiJxFFSiERGReKpNNKmaRyMiInGgPhoREYmrQCgMqOlMRETiRH00IiISVwFN2BQRkXhSH42IiMRVh040ZnazmX1mZmEzc2Z2R0zZdP9c/cfkmGsmmNkHZlZlZkVm9riZdYspzzKzB8yswL9mnpkd1VafT0SkPQgkYHhzWpvdCSYBxcBmYHgT18wCvoh5vgPATyizgL7Ay8BBwLVAV+BS/9p7gRuA5cBs4GJglpmNcM4VtuonERFppxIxGKDNEo1z7goAM3uNphPNc865Jxs5fw1eknnLOfdtM+sK7AQuMrNfAeXAd4EIcKpzrsDMQsDlwA+BO1rzs4iItFdBDW/mPr/Za5WZ3RRz/gj/mAvgnCsHVuHFfzgwDkgHNjnnCmKvBSbGP2wRkfYhEas3t2XT2Z5EgEXAMqA3cC5wr5lVOeceA/r715XHvKbCPw4AsvZS3oCZXQ9cDzBs2LD9jV9EpF0I1By4w5ufds5Ndc5d55z7FnCXf/4C/7jDP3aNeU306+3NKG/AOfeYc26yc25y37599y96EZF2IhiOYAZpKdZm90yWRHNwE+cj/nGpf5wKtYMDxgAO+BxvAEENMMzMorWfKf5xWatHKyLSTgVD3jbOZm2XaNqs6czMrgWOA470T51nZjnAa8DNZtYLr/ksG6/pDOBZ/zgD+BVwlpm9BIwAMoGZzrl1/vs/CVwHzDaz5cBFeE1pD8b1g4mItCOBUKRNm82gbftojgOuink+wX9sBJ7B6y+5AEgFPgPuc849A+CcKzOz04F7gLOBKuAJ4Ccx73cTXq3mImAk8Alwi3NuZ/w+kohI+xIMR8hMa7s5NNC2w5unA9P3cMmMvbz+U+CkPZRXAT/wHyIi0ohATaRNR5xB8vTRiIhIGwiG277pTIlGROQAEgyFyUhVohERkTgJhiJkpivRiIhInAT84c1tSYlGROQAEkzA8OZ9vpuZjfDnwYiISDuR1IMBzOx5M5vmf301sAJYYWbXxCs4ERFpXcEkbzo7la9WRP4v4DS8JWF+0dpBiYhIfARCETLTk3fCZoZzLmhmg4Fezrl5ADFri4mISJJLRI2mJYlmqZndhrdp2dsAftLZHY/ARESk9SVirbOW3O0a4DCgE/Br/9wxfLXwpYiIJLlgKNzmS9A0u0bjr5L8nXrnXgJeau2gREQkPrxFNZO0RmOe68xstpl95p87wcwuil94IiLSWpxzSd909t94zWePA9G9j/OBn7d2UCIi0vpCEYdzJPXw5unAN5xzL+DtbAmwAW8TMhERSXLBkLdpcTLXaFLxdqyErxJN15hzIiKSxNpDonkXuNvMMsHrswH+B3gzHoGJiEjrCviJpq132GxJovkJMBAoBXrg1WSGoz4aEZF2IVE1mmYNb/ZrL32AC4FeeAlms3NuexxjExGRVhQMh4EkTTTOOWdmnwPdnHMFQEF8wxIRkdYWbTpL5lFnnwKj4hWIiIjEV20fTRvvsNmStc4+BP5pZk8Cm/lq5BnOuSdaNywREWlt0T6azCReVPNYvHkzJ9Y77wAlGhGRJJfUgwEAnHMnxzMQERGJr6RPNABmlg2cAwwGtgBvOudK4hGYiIi0rmA4ySdsmtkxwDrge8DhwA3AOv+8iIgkuUDIG97c1hM2W1KjuRf4vr/WGQBmdjFwPzCltQMTEZHW1R6WoBkFzKx37iVgZOuFIyIi8RJsB/No1gCX1Dt3IV5zmoiIJLlAOxgMcDPwlpn9GMgDcoBDgG/EIS4REWllXy2qmaSJxjk338wOBs4GBuGt2vyOc644XsGJiEjrSVTTWbMTjZkNBiqdc8/EnMs2s0HOua1xiU5ERFpNMBwhPdVISbE2vW9L0tprwJB654YAr7ZeOCIiEi/BUKTNazPQwlFnzrnPY0/4z8c058VmdrOZfWZmYTNzZnZHvfILzWyFmQXMbKOZ3VqvPMfMXjezcjMrNbOZZjYgpjzFzO4ws3z/PZaa2Vkt+HwiIh1aMBRp84EA0LJEs9PM6gxl9p8XNfP1k4BivAU56/Anfb4IDANewGvSu9PMbvDLU4C3gXOBeXgrSV9I3drUrcDtQI3/HmOAN8xsXDPjExHp0AKhcJtP1oSWJZongJfN7BtmNtbMzgFeBmY058XOuSuccycBSxsp/jlgwB3OuauAq/zzt/nHc4GxwOfOuTOAU/FGvh1tZieZWRrwU//ab/vvcReQCvysBZ9RRKTDSlSNpiXDm/+EV1v4X2AosAkvydzTCnEc4R9z6x2Hm1nPmPLFAM65sJl9irfT50S8pNMbiABL6r3HxFaIT0Sk3QuGk7/p7ETgJefcGLzVABYB44F+rRBHf/9Y7h8rYsoGNFIee01seaVzzjVS3oCZXW9muWaWu3Pnzn0OXESkvWgPgwEeAsL+13/Bqw1FgMdaIY4d/rFrvSPA9kbKY7+OLe/s9+fUL2/AOfeYc26yc25y37599zlwEZH2ItAOms4GO+c2+f0hX8fruA8CrTGHZqn/flOBj/hqkc5NzrldZhbt15liZoaXII/0zy3DG2BQDPTCG3SwKOY9lrVCfCIi7V4gFGnzVQGgZYlmt5n1x2suW+GcKzezDCC9OS82s2uB4/gqQZxnZjl483P+jLfPze1mNh44zb/mT/7xdWAVMA54D8jE6yda6Jz7wH//vwC/B/5hZnOAi/BqYHe14DOKiHRYwVCEblkt2oasVbQktT2AV1N4Fvg//9yxeAmgOY7DG0021H8+wX8+0Tk3D7gUb4DBpXgJ4jbgEQDnXAQ4C3gLmIaXrF4Gzo95/zuB3+ElvkuAL4HznHPLW/AZRUQ6rGCy12icc3ea2atA2DkXXbF5C3BtM18/HZi+h/IX8ebSNFW+Aa/W01R5GPh//kNEROoJhMJJ30eDc271np6LiEjyCoYjST9hU0RE2rH2MLxZRETasfaw1pmIiLRjSjQiIhJXiZqwqUQjInIAiEQcoYhLyPBmJRoRkQNAMOxv46xEIyIi8RAI+YlGo85ERCQeAiFvTWQ1nYmISFwE/RqNJmyKiEhcRBON+mhERCQuNBhARETiKlCjwQAiIhJHqtGIiEhcfTUYQIlGRETiQIMBREQkrgJKNCIiEk+asCkiInGlCZsiIhJXGnUmIiJxFdSimiIiEk8aDCAiInFVUhkkPdXISlcfjYhIh1ATjlBcEUx0GLXyCisZ2qszqSnW5vdWohERiYOHPljHaXd/RCTiEh0KABuLKjiod5eE3FuJRkQkDmat3E5xRZCCskCiQ8E5x8aiCoYr0YiIdAzFFUFWbN0NQH5JZYKjgR27A1TXRDioT+eE3F+JRkSklX28rgjnt5htToJEs7GoAkA1GhGRjmLu2kK6ZHiju/KLqxIcDWws9BLNQX2UaEREOoR5awuZNrIPfbtlkl+SBImmqJL0VGNgj6yE3F+JRkSkEdtLq/nPmp1NlgdDEQrLG3b0byqqZFNxJceN7MOQ7E7k70qCprPCCob26kxaAlYFACUaEZFGPTpnHVc9sZCCsupGy++etZqT7/qQonrJZu7aQgCOHdmHIdmdk6RGU0FOgvpnQIlGRKRRm4sriTh4+7NtDcpC4QgvL8mnLBDisf+sr1M2b20hA7pncXDfLgzN7sTWXVWEEziXxjlHXlGlEk2UmX1oZq7eY3lM+Q/MbJ2ZBczsSzO7qt7rJ5jZB2ZWZWZFZva4mXVr+08iIu1dtCby+tKtDcrmrytiZ1mAwT078ff5ebW1mkjEMW9dIccd0gczY0h2Z2rCjh27G68VtYWCsgBVNWFyEjS0GZIs0cS4L+bxNICZXQI8CHQDngf6Ak+a2Rl+eTdgFnAS8DawEbgWeKxtQxeRjmDLrio6paeydPMuNhXV7Wd5bekWumWl8fiVkwmEwjz+nw0AfLFtN7sqazhuZB8AhmR3Akho81l0xJlqNPU4526Oedzpn/6Ff7zROTcd+Jn//Db/eA1e8nnLOfdt4ESgGrjIzEa0Uegi0gGUVtVQVh3i4ilDAXhj2ZbasspgiPeWb+fswwYydlB3zpkwiL9/vJGi8kBt/8y0kb2B2ESTuAEB0Tk0SjT1mFmJme0ys9lmNsXM0oDxfnFuveNE/3hE7HnnXDmwCu8zHt4GYYtIB7HFr4FMPagXU3KyeX3pVpw/A3PWFzuoCIb55sTBAPzolEOoqvFqNXPXFDK6fzf6dfOGEQ/qmQQ1Gn9o86CeiRnaDMmXaMqAt4AXgTzgFOA9oA8QXdu63D9W+MceZpYF9K9XHnvNgPo3MrPrzSzXzHJ37mx6CKOIHHiiNZDBPTtx7sTBrCkoZ9X2MgBe+3QLA3tkcdRBvQAY2a8r5/q1mkUbiznWbzYDyEpPpX/3TDYXJ7BGU1jB0OzEDW2G5Es05zrnznHOfQ+YgpdssoHTgbB/Tdd6x1LnXDWwo9752K+317+Rc+4x59xk59zkvn37tuZnEJF2bssurwYyJLsTZ40fQGqK8frSrRSVB5izppBvThxMSsxy+z86ZSRVNWECoQjHH9KnznsleojzxqJKchK0IkBU0iQaM+sMDGyiOAis8L+e6h+n+Mdl/nFpbLk/OGAM4IDPWzVYEenQtpRUkZWeQq8uGfTumsnxh/ThzWVbeWPZVsIRx3lHDKpz/ch+3Tjn8EFkpqUw1a/pRCVy0qY3tLmC4b0TN+IMkijRAP2ADWb2rpk9AiwChuPVVGYD0UEB/2dmTwJ/9p//yT/OAIqAs8zsJeAjIBP4h3NuXdt8BBHpCPJLqhjcsxNmXq3lmxMHsWVXFQ/8ey1jBnRjzIDuDV7zh28dxms/OJYumWl1zg/J7sS2XdWEwpE2iT3WzrIAlcFwwtY4i0qmRFME/B0YBVyF1+fyGnCqc67QOfcccBNeH8x3gJ3ANc65dwGcc2V4TWwfAWcDOcATwHVt+zFEpL3bsquKIdlf1QJOHzuAzLQUiiuCnH/E4EZf0zUzjUMHNkxAQ7M7E4o4diRgX5oNhYldtTkqbe+XtA0/UewxKTjn7gfu30P5p3jzaERE9ll+SSWHDelR+7xrZhqnje3PO59v49yJg/bwyoaiCWtzcSWD/VFobSXPn/+TqJ01o5Im0YiIJIOKQIiSypraOTBRt505hvMmDmZgj5Yli0RO2txQVEFaSmKHNoMSjYhIHdERZ/VrH0OyO9dpTmuugT2zMEvMpM28ogqGJXDV5qhk6qMREUm46GTNfUkqjclMS6V/t6zE1GgKKxM+4gyUaERE6siPmUPTWob26tTmkzajQ5sTPYcGlGhEROrIL6kkIzWFvl0zW+09EzFpMzq0OZFrnEUp0YiIxNhSUsWgnll1Zv7vryHZndi+u23n0mz0R5ypRiMikmTyS6oY3IrNZuAlmnDEsa20bfalqQlHmOevJJ2TBH00GnUmIhJjy64qThndr1XfMzqwIL+kiqG94vOLPxJxfLK+iDc/28q7y7ezq7KGQT2y2nzuTmOUaEREfNU1YW/nzFau0QyNTtosqeQYerfqe0f95o3lPPPJJjpnpHLaof05Z8IgThjVJ+FDm0GJRkSk1tY4jDgDGNAjixT7atJmJOK4e9Zq/r2qgHsunsjoAfu34/zM3M0888kmrj42h1vPGEOnjNS9v6gNJT7ViYgkiaYma+6vjLQUBnTPIr+kkppwhJ/+YxkPfrCWDYUVfOuhecxeuWPvb9KE5VtK+fVryzl2ZG9+ddahSZdkQIlGRKRWtMbR2k1n4PXTrNlRzjVP5fLKp1u45fRR/PunJzKib1eu/Xsuj360rnYXz+YqqQjyvWcW07tLBvdfckRSNJM1JjmjEhFpoZKKICu37d6v99hSUkVqijGge+uvDTYkuxOfbyll3tpC/nzB4fzo1EMY2KMTM284hrPGD+SP767ilpnL2F1d06z3C0ccN724lILdAR6+fBK9W3HeT2tTohGRDuEvs77kvP+bR3FFcJ/fY8uuKgZ0z4pLzWDMwG50Sk/l8SsncdGUobXnO2Wk8uB3juAnp43itaVb+Nrdc/jXigabAtdREQhxy8ylzFm9k9vPHcvEoT1bPd7WpEQj0kF9sKqAZxfkJTqMNrM4bxeBUISXF+c36/q8ogpyNxbXOZdfUtnqAwGirj1uBAt/dSqnjOnfoMzMuOm0Q3j1+8fSs3M61z+9mO8/u5iCsobzblZt3825D87l9WVbueX0UXxn6rC4xNuaNOpMpAP6LH8XNzyzmBSDiyYPJT1J2+5bS2UwxJfbvWazZxfkcc1xBzU5s39zcSUP/HsNLy/ZgnOO5687mqNGeEOOt5RUcfTB8Rl+nJJidMtK3+M1E4b25M0fHcdjc9Zz3+w1vP9FAROH9WTawb05ZkRv1hdWcMcbK+jeKZ1nrz2KaQf3iUusrU2JRqSD2VkW4IanF+OcozrkWLltN4cPSe6mlf31eX4pEQfnTBjEm8u2Mn9dEccdUveXcFF5gHveX82LizZjZlxx9HA+/LKAm19cyj9vOoHOmals313daqs276v01BR+cPJIzjpsIM8v3MTH64q4b/Ya7n1/DQDHH9KHuy+aSN9uydsnU58SjUgHEgxFuPGZxZRUBnnk8klc81Qui/NKOnyiWZa/C/A2J5u7ZifPLsirk2jCEccNTy9mWf4uLpo8lB+eMpKBPTqxbPNgLnh4Pr945TNuO/NQIg6GJMFMeoCD+nThl2cdCkBpZQ0LNhRRVRPmnMMHteo6bG1BiUakA7njzRXk5pXwwKVHcOqh/RnUI4vFeSVcfexB+/3ey7eUsq20mtPHNuxjaAvlgRDF5UGGNbJ219LNuxiS3YlBPTtx0eShzJi7gR27q+nvjx57bM56cvNKuOfiCZx/xJDa100Y2pOfnjGaP727iow0r3kxXn00+6NH53S+Nm5AosPYZx274VbkADIzdzPPLdjE9048mHMmePvaHzk8myV5Jfv93p/l7+LiRz/mxmcWU1Qe2O/32xd/eGclZz/wH6prwg3Klm0urR15denUYYQjjhcXbQbgi627uXvWl5w5fgDnTRzc4LXXHz+C40b24fWlW4H4zKE50CnRiHQAoXCE+95fw6Th2fzsjNG15ycNz2ZraXXt0iqx7p61mm8/PJ9IZM+TBNcWlDP9b4vokplGKOJ49dMtrR7/3jjn+GBVAWXVodpViaMKyqrZsquqNtHk9OnC8Yf04fmFm6gKhvmvmUvp0SmD359/GGYNm5xSUoy7L5pAry4ZmMHAHko0rU2JRqQDeH9lAVt2VXH9CSNIjWm/nzQ8G4Alm+rWasIRx3MLNpGbV8L7e1j+ZOuuKq786wJSDGbecAwTh/bkxUWbWzyDfX+tLSivXWL/XyvqxrtscylAnbkklx01nG2l1Vz+1wWs2l7Gn799GL26ZDT5/v26Z/HoFZO49YwxtU1o0nr0HZW4ue2Vz7j1pWWJDuOA8NT8jQzu2YnTDq3bf3LowO5kpaewuF7z2cINxRSWB0hLMR5pYumT4oogV/x1AWXVIZ767lRy+nThoslDWVNQzrL80hbH6JyjoKya4oogu6trqAqG91qbivpo9U7AS5zvr9xBOOZ1SzeXkJpijB/co/bcqYf2o3/3TBbnlXDp1KGNzl2pb0pOL2486eAWfippDiUaiYt1O8t5YdFmZubms3xLy38pCeyqDLKpaO/7zK/avpuP1xdxxTHD69RmwBsqO2FIzwb9NO98vo2s9BR+esZolmzaxaKNdcsjEcePn/+UzSVVPH7VZMYN8n6JnzNhIFnpKbX9H81VWlXDdx5fwNTfz+bI/5nF4Xf8i0N/809Ou+ejRpv16vvPmkJG9OnC1cfmUFQRrJM4l20uZcyAbmSlf7WYZHpqCjedOorJw7P59dljWxSrtD4lGomLv87dQHpqCt2y0rh/9ppEh9PuRCKOK59YyNn3/4eC3XvelfGp+XlkpqVw8eShjZZPGp7Niq27qQp6nejhiOPd5ds5ZUw/rjomh15dMnj0o3V1XvP3jzcyd20ht58zlqNHfDWBsVtWOmcdNpA3l22tfb+92V5azcWPfsyijcX85LRR/Pbccfz67EP56ddGsXN3gMtmLGh0BnxUdU2YBRuKOGFUX04a3Y+M1BTe85doiUQcyzbvanQJlu8cNYyXbpxGl0wNrk00JRrZZ2XVNZQHQg3OF5UHeHlxPt86YjDXHHcQ//piB19s3b/FDptj+ZZSlm7eFff7tIV3lm/js/xSygIhfvvWF01eV1pZw2ufbuG8iYPJbqIPYtLwbEIRx2f+XJNFG71ms7MOG0injFSuOiaH2asK+HJ7GeD1h/zx3VWcPLpvo8ubXDx5KOWBEO98vm2vn2NtQRkXPDyfzcWV/O3qKdx02iFcNS2Ha48fwQ9POYQnvzuFHburuWLGwibXKMvdWEJ1TYQTRvWha2Yax47szb++2I5zjvWF5ZQFQkm/1teBTolG9sncNYWcdNeHnPPA3AarzT79SR6BUIRrjz+Iq6cdRLfMNB74d3xrNa8v3cL5D83jihkLEjb8trUEQxHueu9Lxgzoxk9OG8Xbn23jg1UFjV47M3czVTVhrpqW0+T7HTHMGxCw2B8Q8PZnXrPZKWO87YqvPGY4ndJTeXTOOmrCEW6ZuZROGancecHhjY7SmnpQL3J6d2Zm7p6bzxbnlXDBwx8TCEV48YZjOP6Qvg2umTS8FzOunMyGogqufGIBpVUNVy6es2Yn6alWW7M6Y9wANhdXsXJbGUsbGQggyUeJRlokEnHcP3sNVzyxgO6d0tlcXMktM5fVdupW14R5+uM8ThnTj5H9utGjczrTj83h3eXba/9i3ptQOMKKraU8t2AT89cV7vX6x+es56YXljJ2UA8qa8K1S3XE24bCirjUoF5YtIm8okp+fuYYbjzpYEb268qvX1tOZbBu7TEccfz9k41MzenF2EHdm3y/Xl0yGNG3C0vySmqbzU4e3Y/OGV6TUnaXDC6eMpQ3lm7lN6+vYFl+Kb8/7zD6NbFUvplx4eShLNhQzMbCikavmb+2kMtnLCC7czqv3DitTkd9fdNG9uHRyyfx5fYyrnsql1A4Uqd8zuqdTB7eqzbe08b2xwz+9cV2lm4uoWtmGgf37drk+0viKdFIs5VUBPlVen29AAATlUlEQVTuU4u4e9Zqzp0wiLd+dBy/POtQZn2xg0fmeG38ryzZQlFFkGuP/2om+jXHHUSXjFTu30ut5qXF+Vz4yHzG3/EeZ98/l1+++jnT/7aoycEEkYjjd299we/fWclZhw3gxeuP5rKjhvHcwk2s2dG8pLavtpVWceEj8zn/oXk8MHtNs0dP7U15IMT9s9dw9IhenDSqLxlpKfzh/MPYsquK++ol0H8u387m4qo91maiJg3LZnFeSe1os7MPH1in/NrjD8IBzy/cxHkTBzUor++CI4eQYvCPxQ1rNR+sKmD6k4sY1qszM793TKMz+es7eUw/7rzgcBZuLOavczfUni/YXc2q7WWcMOqr2lCfrplMHp7Neyt2sGxzKYcP6dHulmQ50CjRJLEvt5exYH0RgVDzOl33ZvWOMlY38Qs4GIrw5LwNvLQ4v9GhrpuKKjnvoXnMX1vE/5w3nnsvnkiXzDSuPjaHcyYM4n/f+5L/rNnJjLnrGT+4O8fEdCD37JzBVdNyeOfzbU0mgCfmbuCn/1hGWXWI70wdzn2XTOTNHx5H7y4ZfP/ZJQ2aVAKhMDe/uJQZczcwfVoOD1x6JFnpqdx82ig6Z6Tyu7dX7sd3as8CoTDfe2YJVcEwZ4wdwF9mreb6p3P3umGVc26v809m/Gc9heVBfnHmobXNVlMP6sUlU7xlVT5YVcBjc9ZxzgNz+cFzSxjaqxNfG7f3obuThmdTUlnDQx+urdNsFjUkuzOXTh3K8N6d+e254/f6fgN6ZHHy6H48+tF6rvt7Lu9+vo1AKMw/l2/j+qdzGdW/Ky9cfzT9ujV/A7HzjxjM18b25y+zVrNuZzkAc9Z4NdoTRtVdIPOMcQNYuW03K7aWqtmsHbC2nniVjCZPnuxyc3Nb/Lo73ljBgB5ZTJ+WU2doZdSWXVWEwhGG9ercaFv3nnyWv4tLHvuEymCYzLQUJudkM+3gPnx9/IB9aiZ4eXE+t73yOcFwhG9OHMQtp4+u/Utz3tpCfvP6ctbt9JpBvj1pCL87b3ztZ1q5bTdXPrGQmnCEv141pXYSYFRFIMT5D81jY1ElwVCE+y6ZyDfrLfVRXBHkuDv/zaEDu3PXtw9nRMxneGLuBv77rS84c/wA7r/0iDpL2i/OK+HiRz/m5DH9eOyKSZgZZdU13PD0YuavK+LWr4/mxhMPrvP9fXzOen7/zkqevHoKJ42u+wt1fznn+MXLn/Ni7mYeufxIzhg3gL9/nMf/vPUFQ3t15ienj6K0qoYdpdVs313NzrIARRUBisqDFJUHGT2gG09Mn9Loyrs7ywKcdNcHnDi6Lw9dNqlO2a7KIKf+5SOK/A7zw4f04JzDB/HNIwY165f5mh1lnH7PHADOHD+Ahy+f1OAa5xw1YdfsCYsFZdU8Pmc9ry/dSkFZgO5ZaVQEw0wY0oO/XT2VHp32vCR+U+95+t1zGNmvKzNvOIafvLiU+esKWfjL0+rUWjYVVXLCXR8A8NgVk9r1OmDtmZktds5N3ut1SjT7lmhC4Qjfe2YJ76/cweCenbj166M55/BBmMGijSXM+M96Zq3cgXPQt1smU3KymTS8F5lpKWworGBDYQUbCyuYMLQn//3NcXX2qdhUVMm3Hp5HVnoqvzhzDIvzSvh4XRGrtpeRYnDhpKHcdNohDGrGKrPhiOPP763i0Y/WM+3g3kwY2pO/zdtAKOy4dOowiiuCvP35Nob16sxvvjGWz7eUct/sNYwf3J2HL5vEttJqrnlqEV0y0nj6mqkc0r9bo/dZv7Ocbz44j25ZaXx068mN7n/y0uJ8bn99OYFQhMuOGsaPTz2E15dubTLJREUT0S/OHMO3jhjMVX9bxJodZdx5weFcMGlIg+sDoTBfu2cOGakpvHvT8XvcLbG0qoY1O8pYvaOc1TvKyExL4fSx/TlyWHajzTHPLsjjV68u5wcnH8zPzhhTe37RxmK+/+wSdpZ5AxFSU4y+XTPp1z2TPl0z6d0lg+6d0nluwSYG9czi+euOrtMHUlpZw80vfsqcNYXM+skJdRJx7D0+3VTCGeMGMLx3lyY/U2MiEcfE//4Xu6tDPHDpEbVrobWGcMQxf10hry7ZQjAc4c4LDt+vIcWvLMnnv2Yu49dnH8pDH67jxFF9uefiiQ2u+/q9c1i1vYyFvzy1yf4kiS8lmhbY1xoNeJ2ev39nJSu27ubwIT0wYFl+KT07p3PF0cPp3z2L3I3FLNpYwhZ/YlpWego5vbswqGcnPlq9k2G9OvPw5UcyZkB3iiuCXPDwfEoqg7x847Q6tZeC3dU88tF6nvkkDwymT8th8vBs1hSU+81i5WSkpTBxSA8mDO3J2EHd+d/3vuT9lQVcdtQw7jh3HOmpKRTsrua+2Wt4YdFm0lKMH5w8kutPGFFbg5m9cgc3v7iU1BSjKhhmcHYnnr7mKAbvJbGtLSgDjJH9mq5x7SwLcN/s1Ty/cDOZaSlUBsN8fdwAHvhO40kGvL+0f/j8p7z7+Tb6dctid3UND18+iRNHNRzFFPXP5dv43jNLuGjyEDpnpJFfUkV+SSUllUFqwo6acISacITqmq86nrtkpFITdgTDEfp2y+RrY/szYWhPUvzaUll1DX94ZyXTDu7DE9OnNJgcWVpVQ15RBf27Z9Gna2aDcvBm5F/9t4X0657Fc9cdxcAenZi3tpBbZi6jsDzAr88+lOmtsNJyY656YiGfrC9iyf87PannljjnuOapXD5avZNwxDVYcTnqhYWbmPXFDv46fUoCohQ4QBONmWUBdwEXA92AJcB/OecW7Ol1+5NowPtr8bWlW7jn/dWkp6bw3WMP4oIjh9Apo25z2vbSasLOMbB7Vu1fywvWF/HD5z+lrLqG288Zx8zczazYupvnrj2KyTm9Gr1ffkkl976/hleW5BPtgx7csxOj+nelqibMZ/mlVPqT6VJTjNvPGcuVx+Q0eJ/tpdWkpRp9ujZsxtlQWMH3n11CVnoKM66cTO9GrtkfawvK+N/3VtMtK40/fOuwve4AWR4I8c0H51JaVcMT06fsdX8V5xyXzVjA/HVFdMlIZWivzgzJ7kTvLpmkpxnpqSmkp6aQ3TmD0QO6Mqp/Nwb16ERFMMQHX+7kveXb+eDLgtrvY9SIPl149fvH0qNzy5uFohbnFXPVE4vo1SWDk0f35amP8xjRtwv3XXwEhw1penTW/lq5bTdbd1Vx6qGJWea/JbaXVnP6PR9RVh1i0a9Oa1ebfB1IDtRE8whwA7Dcf1wMlAMjnHNNjpPd30SzvwrKqvnx85/yyfpizODhy47k6+P3POoHYGNhBcWVQUb170bXmL9QwxHH2oJyluXvYlT/bvvcWRr92Whp/1K8lAdCRJyj+162w40KhiKUB0Jkd07fp89QXROubQqL6tc9k8y0hv1xLbV0867adcSuOHo4vzzr0AZ/mBzoZq/cwaebdvHTmNWoJbkccInGzPoB+UAqMNA5V2BmTwOXA791zt3R1GsTnWjA6/OZMXcDA3tkNehIl45p/c5yiiqCTGmi5iqS7JqbaJK3obblxgHpwEbnXHQadS5eomnQk2hm1wPXAwwb1nCZjbaWlprC907UyrEHkhF9uzKi6W4mkQ6jI82jiTY8l8eci05bbjD20Tn3mHNusnNuct+++t8uIhIvHSnRRHdDih3yFP16exvHIiIivo6UaL4AaoBhZhat3UTHPWr3LRGRBOkwicY5twN4Eu8zzTazF4BL8ZrSHkxgaCIiB7SONBgA4Ca8Ws1FwEjgE+AW59zOhEYlInIA61CJxjlXBfzAf4iISBLoME1nIiKSnJRoREQkrjrMygD7w8x2Ann7+PI+wN63gUwcxbd/FN/+UXz7J9njG+6c2+tERCWa/WRmuc1ZgiFRFN/+UXz7R/Htn2SPr7nUdCYiInGlRCMiInGlRLP/Hkt0AHuh+PaP4ts/im//JHt8zaI+GhERiSvVaEREJK6UaEREJK6UaPaBmWWZ2QNmVmBmVWY2z8yOSmA8N5vZZ2YWNjNnZnfUK7/QzFaYWcDMNprZrW0c3wwzW2lm5WZWZGbvmNn4ZInRzJ4ysy3+vQvN7J9mdkRM+Q/MbJ1f/qWZXdVWsdWL81L/39eZ2b3JFJ+ZfRgTW/SxPMliPN/MFvn/Z0vNbK6ZZftlifz5O6mR7130MT3R8bUK55weLXwAjwAO+Bx4HogAu4E+CYrnaeBDYKMf1x0xZcf48ZUBT+Ftd+2AG9owPgd8DDwObPCf5wNZyRCj/717DngY+NK/d55fdon/vABvdfBi//kZbfxvPAQowVs01gH3Jll8H0bjinn8PFlixFvJ3QHVwAvADOAzYHAS/PyNrPd9m+Hf3wHHJTq+VvmMiQ6gvT2AfkAQCAP9/HNP1/8Fn6DYXmsk0UTP3eI/P9V/vrEN45oU83VOzH+iI5Mlxpj4jvTvHcbbGnyp//wCv/wa//mHbRiTAbOBFf4vydhEk/D4/Pt+CLgmyhIao//92+Tf86RGypPm58+//4/8+y9Oxvj25aGms5Ybh/cLaJNzrsA/l+sfJyYmpD2KNgHl1jsON7OebRGAc25xzNMM/xgGtvlfJzxGM/uhmT2EV0MF+Avef+ZoE1/92Nry3/pmvL9sL8P7ixwAM0sjOeKrZWYlZrbLzGab2ZQkifEQYChQBdzqN+GuNbPoKu8J//mLMjMDfuw/jTaPJk18+0qJpuWiu3eWx5yr8I8D2jiW5qgfb0VMWZvGa2Zdgb/5T+92zkUTTTLE+G3gRmAUXtPEPLx1plKbiK2HmWXFOyi/L+uPwG+cc0vrFSc8vhhlwFvAi3jrBp4CvJckMfbxj52AEcBMvCazB83sPJLj5y/qG3hNadvwvpeQXPHtkw61H00b2eEfu8aci369vY1jaY4dwDC+ijE27jaL18z6Au8Ak/H6an4eU5zwGJ1zJ/m/9M4AXgFewvsPH8b7RdkVKIqJrdQ5V93Ye7WyC/BqgSea2fHABP/8uXh/oSc6vqhzXbSdyiwDWA0MB05PghhjNz68wjm3yMyqgO/jfR8T/vMX42b/+LBzLuh/nUzx7RPVaFruC7wO2WFmFv1LY4p/XJaYkPYo+lfwVP8YjXWTc25XWwRgZsOBuXhJ5o/Oueujv5QSHaOZdTKzVAD/l94/8f5yTMP763dFE7G11b+1+Y8zgbPxBgUAHITXSZzo+DCzzsDAJoqDJD7GPLzBOo0pJwn+jwCY2WF4NcFqvAFHUUkR335JdCdRe3zgLQvhgOV4nbPRESF9ExTPtXijeaIdnkv95+cBx/rxleONWNniX3NjG8YXvWcedUfXTPXLExYjcBKw1f93fNj/N42OkOoBfCfm+ZN4I78ccGaC/q2fpO5ggITHhzfAIwC8i/cLcpkfw3a8ZqtkiPG3/j1XAk/g1QZDwFHJ8H/Ej/Gv/n1n1DufFPHt12dLdADt8YHX1vt/eFXyamA+cEwC44n+8qn/uMMvvxivJhb0k9Ev8JcfaqP4GovNAdNjrklIjHh9Mh/iNekE/f/EM4HxMdf8GFjvl68GvpsE/9b3Jkt8QDe85tB1/i/w7cCrwLgkijENr69rG14fxyLgrET//MXcv4//vXOxP3vJEt/+PrTWmYiIxJX6aEREJK6UaEREJK6UaEREJK6UaEREJK6UaEREJK6UaEREJK6UaERayN8X5KQE3XuYvyhk6t6vFkkOmkcjso/8DeZGOucuj+M9NgLXOufej9c9ROJNNRqRBPGX0Bfp8JRoRFrI30r3G8AvgYv9pqxlflkPM/urmW3zt4f+XbSZy8ymm7ft9z1mVgTcYWYHm9m//S2uC83s2egeI2b2NN6qvW/697jVzHL8LX7T/GsGmdkbZlbs77FyXUycd5jZTDP7u5mV+U1+k2PKf+7HWOZvr3xqm30T5YCiRCOyb6qBPwAvOue6Oueiy/c/ibdY40i8Dau+hrfoadRReGt+9Qd+j7cy8x+BQcCheBt03QHgnLsCb12rc/x7/LmROF7A2z9nEN6eOn8ws1Niys/1r+kJvAE8CGBmo4EfAlOcc93wtkfYuE/fCZG9UKIRaSX+thFnATc75yqctwPrPcAlMZdtdc494JwLOeeqnHNrnXOznHMB59xO4G7gxGbebyjeyr4/d85VO29jtBnAlTGXzXXOveOcC+NtOR5NiGEgExhrZunOuY3OuXX78fFFmqQ2YpHWMxxvm+9t3o68gPfH3OaYa2K/jian+4Dj8VZBTsFbRr85BgHFzrmymHN5ePv+RMVujFUJZJlZmnNurZndjFd7Gmdm7wH/5Zzb2sx7izSbajQi+67+kM3NePuy9HHO9fQf3Z1z4/bwmj/45w5zznUHLsdrTmvq+lhbgV5m1i3m3DC8rQ72HrxzzznnjsNLkA64szmvE2kpJRqRfbcDyDGzFADn3DbgX8BfzKy7maX4nf17agrrhrehVamZDQZ+1sg9RjT2QufcZry9kP5oZllmdjhwDfDM3gI3s9FmdoqZZeL1N1Xhba4l0uqUaET23T/8Y5GZLfG/vhLIwNukqgR4iaa3OQZv58cjgVLgbeCVeuV/BH5tZrvM7KeNvP5SvB0ut+JtNnZ7M+fcZAJ/Agrxmtf6Abc143UiLaYJmyIiEleq0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFwp0YiISFz9fyFF/XcZmfLSAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import matplotlib\n",
    "from matplotlib import pyplot as plt\n",
    "font = {'family' : 'normal',\n",
    "        'weight' : 'bold',\n",
    "        'size'   : 12}\n",
    "\n",
    "matplotlib.rc('font', **font)\n",
    "plt.plot(scores)\n",
    "plt.xlabel('iterations')\n",
    "plt.ylabel('scores')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2020-06-20 14:10:10,156\tINFO resource_spec.py:212 -- Starting Ray with 8.84 GiB memory available for workers and up to 4.44 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).\n",
      "2020-06-20 14:10:10,685\tINFO services.py:1170 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8265\u001b[39m\u001b[22m\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 1.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 3/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 RUNNING)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status  </th><th>loc  </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>RUNNING </td><td>     </td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[2m\u001b[36m(pid=5356)\u001b[0m 2020-06-20 14:10:14,743\tWARNING compression.py:16 -- lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.\n",
      "\u001b[2m\u001b[36m(pid=5356)\u001b[0m 2020-06-20 14:10:15,372\tINFO trainer.py:421 -- Tip: set 'eager': true or the --eager flag to enable TensorFlow eager execution\n",
      "\u001b[2m\u001b[36m(pid=5356)\u001b[0m 2020-06-20 14:10:15,372\tINFO trainer.py:580 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.\n",
      "\u001b[2m\u001b[36m(pid=5356)\u001b[0m 2020-06-20 14:10:18,952\tINFO trainable.py:217 -- Getting current IP.\n",
      "\u001b[2m\u001b[36m(pid=5356)\u001b[0m 2020-06-20 14:10:18,962\tWARNING util.py:37 -- Install gputil for GPU system monitoring.\n",
      "\u001b[2m\u001b[36m(pid=5353)\u001b[0m 2020-06-20 14:10:19,778\tWARNING compression.py:16 -- lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.\n",
      "\u001b[2m\u001b[36m(pid=5354)\u001b[0m 2020-06-20 14:10:19,776\tWARNING compression.py:16 -- lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.\n",
      "Result for PPO_CartPole-v0_00000:\n",
      "  custom_metrics: {}\n",
      "  date: 2020-06-20_14-10-30\n",
      "  done: false\n",
      "  episode_len_mean: 22.083333333333332\n",
      "  episode_reward_max: 61.0\n",
      "  episode_reward_mean: 22.083333333333332\n",
      "  episode_reward_min: 8.0\n",
      "  episodes_this_iter: 180\n",
      "  episodes_total: 180\n",
      "  experiment_id: bfb3f5a93ed148fe88672bb6128750de\n",
      "  experiment_tag: '0'\n",
      "  hostname: server\n",
      "  info:\n",
      "    grad_time_ms: 5836.02\n",
      "    learner:\n",
      "      default_policy:\n",
      "        cur_kl_coeff: 0.20000000298023224\n",
      "        cur_lr: 4.999999873689376e-05\n",
      "        entropy: 0.6624745726585388\n",
      "        entropy_coeff: 0.0\n",
      "        kl: 0.031605158001184464\n",
      "        model: {}\n",
      "        policy_loss: -0.04275025054812431\n",
      "        total_loss: 77.00390625\n",
      "        vf_explained_var: 0.13094556331634521\n",
      "        vf_loss: 77.04032897949219\n",
      "    load_time_ms: 59.289\n",
      "    num_steps_sampled: 4000\n",
      "    num_steps_trained: 3968\n",
      "    sample_time_ms: 5405.416\n",
      "    update_time_ms: 539.935\n",
      "  iterations_since_restore: 1\n",
      "  node_ip: 127.0.0.1\n",
      "  num_healthy_workers: 2\n",
      "  off_policy_estimator: {}\n",
      "  optimizer_steps_this_iter: 1\n",
      "  perf:\n",
      "    cpu_util_percent: 49.833333333333336\n",
      "    ram_util_percent: 13.772222222222222\n",
      "  pid: 5356\n",
      "  policy_reward_max: {}\n",
      "  policy_reward_mean: {}\n",
      "  policy_reward_min: {}\n",
      "  sampler_perf:\n",
      "    mean_env_wait_ms: 0.08393468087358857\n",
      "    mean_inference_ms: 1.1483905669956762\n",
      "    mean_processing_ms: 0.23641136361566414\n",
      "  time_since_restore: 11.896221160888672\n",
      "  time_this_iter_s: 11.896221160888672\n",
      "  time_total_s: 11.896221160888672\n",
      "  timestamp: 1592658630\n",
      "  timesteps_since_restore: 4000\n",
      "  timesteps_this_iter: 4000\n",
      "  timesteps_total: 4000\n",
      "  training_iteration: 1\n",
      "  trial_id: '00000'\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 2.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 3/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 RUNNING)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status  </th><th>loc           </th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">  ts</th><th style=\"text-align: right;\">  reward</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>RUNNING </td><td>127.0.0.1:5356</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">         11.8962</td><td style=\"text-align: right;\">4000</td><td style=\"text-align: right;\"> 22.0833</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Result for PPO_CartPole-v0_00000:\n",
      "  custom_metrics: {}\n",
      "  date: 2020-06-20_14-10-39\n",
      "  done: false\n",
      "  episode_len_mean: 41.06\n",
      "  episode_reward_max: 131.0\n",
      "  episode_reward_mean: 41.06\n",
      "  episode_reward_min: 8.0\n",
      "  episodes_this_iter: 90\n",
      "  episodes_total: 270\n",
      "  experiment_id: bfb3f5a93ed148fe88672bb6128750de\n",
      "  experiment_tag: '0'\n",
      "  hostname: server\n",
      "  info:\n",
      "    grad_time_ms: 5649.65\n",
      "    learner:\n",
      "      default_policy:\n",
      "        cur_kl_coeff: 0.30000001192092896\n",
      "        cur_lr: 4.999999873689376e-05\n",
      "        entropy: 0.6107953786849976\n",
      "        entropy_coeff: 0.0\n",
      "        kl: 0.019298221915960312\n",
      "        model: {}\n",
      "        policy_loss: -0.028672633692622185\n",
      "        total_loss: 240.02420043945312\n",
      "        vf_explained_var: 0.1258750855922699\n",
      "        vf_loss: 240.0470428466797\n",
      "    load_time_ms: 30.972\n",
      "    num_steps_sampled: 8000\n",
      "    num_steps_trained: 7936\n",
      "    sample_time_ms: 4155.977\n",
      "    update_time_ms: 272.502\n",
      "  iterations_since_restore: 2\n",
      "  node_ip: 127.0.0.1\n",
      "  num_healthy_workers: 2\n",
      "  off_policy_estimator: {}\n",
      "  optimizer_steps_this_iter: 1\n",
      "  perf:\n",
      "    cpu_util_percent: 56.40833333333334\n",
      "    ram_util_percent: 14.0\n",
      "  pid: 5356\n",
      "  policy_reward_max: {}\n",
      "  policy_reward_mean: {}\n",
      "  policy_reward_min: {}\n",
      "  sampler_perf:\n",
      "    mean_env_wait_ms: 0.08428038305329742\n",
      "    mean_inference_ms: 1.1390618851969072\n",
      "    mean_processing_ms: 0.22680079555299826\n",
      "  time_since_restore: 20.280224800109863\n",
      "  time_this_iter_s: 8.384003639221191\n",
      "  time_total_s: 20.280224800109863\n",
      "  timestamp: 1592658639\n",
      "  timesteps_since_restore: 8000\n",
      "  timesteps_this_iter: 4000\n",
      "  timesteps_total: 8000\n",
      "  training_iteration: 2\n",
      "  trial_id: '00000'\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 2.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 3/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 RUNNING)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status  </th><th>loc           </th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">  ts</th><th style=\"text-align: right;\">  reward</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>RUNNING </td><td>127.0.0.1:5356</td><td style=\"text-align: right;\">     2</td><td style=\"text-align: right;\">         20.2802</td><td style=\"text-align: right;\">8000</td><td style=\"text-align: right;\">   41.06</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Result for PPO_CartPole-v0_00000:\n",
      "  custom_metrics: {}\n",
      "  date: 2020-06-20_14-10-47\n",
      "  done: false\n",
      "  episode_len_mean: 65.45\n",
      "  episode_reward_max: 200.0\n",
      "  episode_reward_mean: 65.45\n",
      "  episode_reward_min: 13.0\n",
      "  episodes_this_iter: 39\n",
      "  episodes_total: 309\n",
      "  experiment_id: bfb3f5a93ed148fe88672bb6128750de\n",
      "  experiment_tag: '0'\n",
      "  hostname: server\n",
      "  info:\n",
      "    grad_time_ms: 5578.294\n",
      "    learner:\n",
      "      default_policy:\n",
      "        cur_kl_coeff: 0.30000001192092896\n",
      "        cur_lr: 4.999999873689376e-05\n",
      "        entropy: 0.5686373710632324\n",
      "        entropy_coeff: 0.0\n",
      "        kl: 0.012524282559752464\n",
      "        model: {}\n",
      "        policy_loss: -0.018259910866618156\n",
      "        total_loss: 625.0143432617188\n",
      "        vf_explained_var: 0.06953061372041702\n",
      "        vf_loss: 625.0289916992188\n",
      "    load_time_ms: 21.257\n",
      "    num_steps_sampled: 12000\n",
      "    num_steps_trained: 11904\n",
      "    sample_time_ms: 3753.686\n",
      "    update_time_ms: 183.465\n",
      "  iterations_since_restore: 3\n",
      "  node_ip: 127.0.0.1\n",
      "  num_healthy_workers: 2\n",
      "  off_policy_estimator: {}\n",
      "  optimizer_steps_this_iter: 1\n",
      "  perf:\n",
      "    cpu_util_percent: 56.475\n",
      "    ram_util_percent: 14.0\n",
      "  pid: 5356\n",
      "  policy_reward_max: {}\n",
      "  policy_reward_mean: {}\n",
      "  policy_reward_min: {}\n",
      "  sampler_perf:\n",
      "    mean_env_wait_ms: 0.0847291427916976\n",
      "    mean_inference_ms: 1.139511109381423\n",
      "    mean_processing_ms: 0.22259702333722806\n",
      "  time_since_restore: 28.677826404571533\n",
      "  time_this_iter_s: 8.39760160446167\n",
      "  time_total_s: 28.677826404571533\n",
      "  timestamp: 1592658647\n",
      "  timesteps_since_restore: 12000\n",
      "  timesteps_this_iter: 4000\n",
      "  timesteps_total: 12000\n",
      "  training_iteration: 3\n",
      "  trial_id: '00000'\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 2.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 3/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 RUNNING)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status  </th><th>loc           </th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">   ts</th><th style=\"text-align: right;\">  reward</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>RUNNING </td><td>127.0.0.1:5356</td><td style=\"text-align: right;\">     3</td><td style=\"text-align: right;\">         28.6778</td><td style=\"text-align: right;\">12000</td><td style=\"text-align: right;\">   65.45</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Result for PPO_CartPole-v0_00000:\n",
      "  custom_metrics: {}\n",
      "  date: 2020-06-20_14-10-56\n",
      "  done: false\n",
      "  episode_len_mean: 97.75\n",
      "  episode_reward_max: 200.0\n",
      "  episode_reward_mean: 97.75\n",
      "  episode_reward_min: 17.0\n",
      "  episodes_this_iter: 28\n",
      "  episodes_total: 337\n",
      "  experiment_id: bfb3f5a93ed148fe88672bb6128750de\n",
      "  experiment_tag: '0'\n",
      "  hostname: server\n",
      "  info:\n",
      "    grad_time_ms: 5547.185\n",
      "    learner:\n",
      "      default_policy:\n",
      "        cur_kl_coeff: 0.30000001192092896\n",
      "        cur_lr: 4.999999873689376e-05\n",
      "        entropy: 0.5512956976890564\n",
      "        entropy_coeff: 0.0\n",
      "        kl: 0.00983278825879097\n",
      "        model: {}\n",
      "        policy_loss: -0.010200484655797482\n",
      "        total_loss: 534.859619140625\n",
      "        vf_explained_var: 0.29166412353515625\n",
      "        vf_loss: 534.866943359375\n",
      "    load_time_ms: 16.359\n",
      "    num_steps_sampled: 16000\n",
      "    num_steps_trained: 15872\n",
      "    sample_time_ms: 3535.112\n",
      "    update_time_ms: 138.885\n",
      "  iterations_since_restore: 4\n",
      "  node_ip: 127.0.0.1\n",
      "  num_healthy_workers: 2\n",
      "  off_policy_estimator: {}\n",
      "  optimizer_steps_this_iter: 1\n",
      "  perf:\n",
      "    cpu_util_percent: 56.958333333333336\n",
      "    ram_util_percent: 14.0\n",
      "  pid: 5356\n",
      "  policy_reward_max: {}\n",
      "  policy_reward_mean: {}\n",
      "  policy_reward_min: {}\n",
      "  sampler_perf:\n",
      "    mean_env_wait_ms: 0.08514448147109328\n",
      "    mean_inference_ms: 1.1413802179293384\n",
      "    mean_processing_ms: 0.2188190214905292\n",
      "  time_since_restore: 37.022998332977295\n",
      "  time_this_iter_s: 8.345171928405762\n",
      "  time_total_s: 37.022998332977295\n",
      "  timestamp: 1592658656\n",
      "  timesteps_since_restore: 16000\n",
      "  timesteps_this_iter: 4000\n",
      "  timesteps_total: 16000\n",
      "  training_iteration: 4\n",
      "  trial_id: '00000'\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 2.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 3/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 RUNNING)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status  </th><th>loc           </th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">   ts</th><th style=\"text-align: right;\">  reward</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>RUNNING </td><td>127.0.0.1:5356</td><td style=\"text-align: right;\">     4</td><td style=\"text-align: right;\">          37.023</td><td style=\"text-align: right;\">16000</td><td style=\"text-align: right;\">   97.75</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Result for PPO_CartPole-v0_00000:\n",
      "  custom_metrics: {}\n",
      "  date: 2020-06-20_14-11-04\n",
      "  done: true\n",
      "  episode_len_mean: 124.98\n",
      "  episode_reward_max: 200.0\n",
      "  episode_reward_mean: 124.98\n",
      "  episode_reward_min: 17.0\n",
      "  episodes_this_iter: 21\n",
      "  episodes_total: 358\n",
      "  experiment_id: bfb3f5a93ed148fe88672bb6128750de\n",
      "  experiment_tag: '0'\n",
      "  hostname: server\n",
      "  info:\n",
      "    grad_time_ms: 5528.978\n",
      "    learner:\n",
      "      default_policy:\n",
      "        cur_kl_coeff: 0.30000001192092896\n",
      "        cur_lr: 4.999999873689376e-05\n",
      "        entropy: 0.550544798374176\n",
      "        entropy_coeff: 0.0\n",
      "        kl: 0.0035177802201360464\n",
      "        model: {}\n",
      "        policy_loss: -0.005419825669378042\n",
      "        total_loss: 239.95021057128906\n",
      "        vf_explained_var: 0.42236775159835815\n",
      "        vf_loss: 239.95460510253906\n",
      "    load_time_ms: 13.676\n",
      "    num_steps_sampled: 20000\n",
      "    num_steps_trained: 19840\n",
      "    sample_time_ms: 3404.621\n",
      "    update_time_ms: 112.026\n",
      "  iterations_since_restore: 5\n",
      "  node_ip: 127.0.0.1\n",
      "  num_healthy_workers: 2\n",
      "  off_policy_estimator: {}\n",
      "  optimizer_steps_this_iter: 1\n",
      "  perf:\n",
      "    cpu_util_percent: 56.550000000000004\n",
      "    ram_util_percent: 14.0\n",
      "  pid: 5356\n",
      "  policy_reward_max: {}\n",
      "  policy_reward_mean: {}\n",
      "  policy_reward_min: {}\n",
      "  sampler_perf:\n",
      "    mean_env_wait_ms: 0.08513947166998947\n",
      "    mean_inference_ms: 1.142338233105642\n",
      "    mean_processing_ms: 0.21549759755221762\n",
      "  time_since_restore: 45.37456727027893\n",
      "  time_this_iter_s: 8.351568937301636\n",
      "  time_total_s: 45.37456727027893\n",
      "  timestamp: 1592658664\n",
      "  timesteps_since_restore: 20000\n",
      "  timesteps_this_iter: 4000\n",
      "  timesteps_total: 20000\n",
      "  training_iteration: 5\n",
      "  trial_id: '00000'\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 2.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 3/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 RUNNING)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status  </th><th>loc           </th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">   ts</th><th style=\"text-align: right;\">  reward</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>RUNNING </td><td>127.0.0.1:5356</td><td style=\"text-align: right;\">     5</td><td style=\"text-align: right;\">         45.3746</td><td style=\"text-align: right;\">20000</td><td style=\"text-align: right;\">  124.98</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "== Status ==<br>Memory usage on this node: 2.2/15.6 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/6 CPUs, 0/1 GPUs, 0.0/8.84 GiB heap, 0.0/3.03 GiB objects<br>Result logdir: /home/ben/ray_results/PPO<br>Number of trials: 1 (1 TERMINATED)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name           </th><th>status    </th><th>loc  </th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">   ts</th><th style=\"text-align: right;\">  reward</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>PPO_CartPole-v0_00000</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">     5</td><td style=\"text-align: right;\">         45.3746</td><td style=\"text-align: right;\">20000</td><td style=\"text-align: right;\">  124.98</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import ray\n",
    "from ray import tune\n",
    "from ray.rllib.agents.ppo import PPOTrainer\n",
    "from ray.rllib.agents.dqn import DQNTrainer\n",
    "\n",
    "ray.init(ignore_reinit_error=True)\n",
    "trainer = PPOTrainer\n",
    "\n",
    "# if you run this on colab you might want to set num_workers=2\n",
    "analysis = tune.run(\n",
    "    trainer,\n",
    "    stop={'episode_reward_mean': 100},\n",
    "    config={'env': 'CartPole-v0'},\n",
    "    checkpoint_freq=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
